#######################
#### Permutation Tests
#######################
rm(list = ls())
pkg <- c("plyr", "dplyr", "tidyr", 
         "MASS", "multiwayvcov", "fracdiff",
         "fractal",
         "lme4", "ArfimaMLM","gam",
         "forecast",
         "stargazer", "lmtest", "doBy", "ggplot2", 
         "mediation", "vars", "DataCombine", "foreign",
         "mgcv", "ordinal", "reshape2", 
         "xtable", "sandwich", "rms")
lapply(pkg, require, character.only = TRUE)

# rid of E
options(scipen=999)
### setting wd for inputs
setwd("~/Dropbox/Pollution and Public Perceptions in China/Data and Analysis/Data")

# loading the data
load("ppp_cleaned")

##################################################################### 
####################### PERMUTATION TESTS #######################
##################################################################### 

##################################################################### 
########## CLUSTERED ###################
##################################################################### 


#####
# PREP
#####


# collapsing the data 
collapse1 <- summaryBy(l_sat +  econ  + l_watch +	c_watch	+	anti_west	+ 
                         l_pollute +	n_pollute	+ month_pollute	+ week_pollute 
                       + day_pollute + aqi	+ aqi.lagged + aqi.lagged2 + aqi.lagged3 +
                         parade + national ~ day, FUN=mean, na.rm=TRUE, data=p2)
collapse1 <- data.frame(collapse1)

# subset the original data to only include variables we will use 
p3 <- p2[c("day", "aqi", "aqi.lagged", "aqi.lagged2", "l_sat", "c_sat",
           "econ", "l_watch", "c_watch", "anti_west", "hukou", "educ", "soe", "ccp",
           "insider", "kids", "married", "gender", "parade", "national", "age", "income")]

# extract the AQI values from the collapsed dataset
list <- cbind(collapse1$day, collapse1$aqi.mean, collapse1$aqi.lagged.mean, collapse1$aqi.lagged2.mean)
list <- as.data.frame(list)
colnames(list)[1:4] <- c("newday", "newaqi", "newaqi.lagged", "newaqi.lagged2") # renaming the variables 
# so that merging will be easier

### now, start the permutation test

## for l_sat 
# orginal:
l_satc <- lm(l_sat
             ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
             + kids + married + income + gender + ccp + parade + national + age, data=p3)

c_satc <- lm(c_sat
             ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
             + kids + married + income + gender + ccp + parade + national + age, data=p3)

l_watchc <- lm(l_watch
               ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
               + kids + married + income + gender + ccp + parade + national + age, data=p3)

c_watchc <- lm(c_watch
               ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
               + kids + married + income + gender + ccp + parade + national + age, data=p3)

anti_westc <- lm(anti_west
                 ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                 + kids + married + income + gender + ccp + parade + national + age, data=p3)

## Begin permuting
set.seed(999)
# create a list to store the values
iter <- 1000
rep <- list()
rep.t <- list()

for (i in 1:iter) {
  
  new.index <- sample(1:length(list$newday), replace = FALSE)
  new.table <- list
  new.table$newday <- new.index
  
  p.perm <- left_join(p3, new.table, c("day" = "newday")) # merge it into the 
  # analytical data.frame  
  
  # run the temporary model
  l_sat.tmp <- lm(l_sat ~ newaqi+ newaqi.lagged + newaqi.lagged2 + hukou + insider + soe + educ 
                  + kids + married + income + gender + ccp + parade + national + age, data = p.perm)
  c_sat.tmp <- lm(c_sat ~ newaqi+ newaqi.lagged + newaqi.lagged2 + hukou + insider + soe + educ 
                  + kids + married + income + gender + ccp + parade + national + age, data = p.perm)
  l_watch.tmp <- lm(l_watch ~ newaqi+ newaqi.lagged + newaqi.lagged2 + hukou + insider + soe + educ 
                    + kids + married + income + gender + ccp + parade + national + age, data = p.perm)
  c_watch.tmp <- lm(c_watch ~ newaqi+ newaqi.lagged + newaqi.lagged2 + hukou + insider + soe + educ 
                    + kids + married + income + gender + ccp + parade + national + age, data = p.perm)
  anti_west.tmp <- lm(anti_west ~ newaqi+ newaqi.lagged + newaqi.lagged2 + hukou + insider + soe + educ 
                      + kids + married + income + gender + ccp + parade + national + age, data = p.perm)
  rep[[i]] <- c(l_sat.tmp$coefficients[2],
                c_sat.tmp$coefficients[2],
                l_watch.tmp$coefficients[2],
                c_watch.tmp$coefficients[2],
                # econ.tmp$coefficients[2],
                anti_west.tmp$coefficients[2])
  
  # save the point estimate
  rep.t[[i]] <- c(coeftest(l_sat.tmp,cluster.vcov(l_sat.tmp, p.perm$day))[2,3],
                  coeftest(c_sat.tmp,cluster.vcov(c_sat.tmp, p.perm$day))[2,3],
                  coeftest(l_watch.tmp,cluster.vcov(l_watch.tmp, p.perm$day))[2,3],
                  coeftest(c_watch.tmp,cluster.vcov(c_watch.tmp, p.perm$day))[2,3],
                  coeftest(anti_west.tmp,cluster.vcov(anti_west.tmp, p.perm$day))[2,3])
  
}

# p-values calculated with point estimates for l_sat
l_sat_pp <- 1 - mean(unlist(lapply(rep, function(x) x[1])) > l_satc$coefficients[2])
c_sat_pp <- 1 - mean(unlist(lapply(rep, function(x) x[2])) > c_satc$coefficients[2])
l_watch_pp <- mean(unlist(lapply(rep, function(x) x[3])) > l_watchc$coefficients[2])    
c_watch_pp <- mean(unlist(lapply(rep, function(x) x[4])) > c_watchc$coefficients[2])
anti_west_pp <- 1 - mean(unlist(lapply(rep, function(x) x[5])) > anti_westc$coefficients[2])

l_sat_pt <- 1- mean(unlist(lapply(rep.t, function(x) x[1])) > coeftest(l_satc,cluster.vcov(l_satc, p2$day))[2,3])
c_sat_pt <- 1- mean(unlist(lapply(rep.t, function(x) x[2])) > coeftest(c_satc,cluster.vcov(c_satc, p2$day))[2,3])
l_watch_pt <- mean(unlist(lapply(rep.t, function(x) x[3])) > coeftest(l_watchc,cluster.vcov(l_watchc, p2$day))[2,3])    
c_watch_pt <- mean(unlist(lapply(rep.t, function(x) x[4])) > coeftest(c_watchc,cluster.vcov(c_watchc, p2$day))[2,3])
anti_west_pt <- 1 - mean(unlist(lapply(rep.t, function(x) x[5])) > coeftest(anti_westc,cluster.vcov(anti_westc, p2$day))[2,3])

par(mar=c(1,1,1,1))
## FIGURE A6
setwd("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Figures")
pdf("clustered_perm_test.pdf", height=8, width=6)
par(mfrow=c(3,2))
plot(density(unlist(lapply(rep, function(x) x[1]))), main = "Local Satisfaction", 
     xlab= paste("P-value =", l_sat_pp, sep=" ")) 
abline(v=l_satc$coefficients[2], col="red")


plot(density(unlist(lapply(rep, function(x) x[2]))), main = "Central Satisfaction", 
     xlab=paste("P-value =", c_sat_pp, sep=" ")) 
abline(v=c_satc$coefficients[2], col="red")


plot(density(unlist(lapply(rep, function(x) x[3]))), main = "Local Oversight", 
     xlab=paste("P-value =", l_watch_pp, sep=" ")) 
abline(v=l_watchc$coefficients[2], col="red")


plot(density(unlist(lapply(rep, function(x) x[4]))), main = "Central Oversight", 
     xlab=paste("P-value =", c_watch_pp, sep=" ")) 
abline(v=c_watchc$coefficients[2], col="red")

plot(density(unlist(lapply(rep, function(x) x[5]))), main = "Belief in Anti-West Conspiracy", 
     xlab=paste("P-value =", anti_west_pp, sep=" ")) 
abline(v=anti_westc$coefficients[2], col="red")


dev.off()
## FIGURE A7
setwd("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Figures")
pdf("clustered_perm_test_t.pdf", height=8, width=6)
par(mfrow=c(3,2))
plot(density(unlist(lapply(rep.t, function(x) x[1]))), main = "Local Satisfaction", 
     xlab=paste("P-value =", l_sat_pt, sep=" ")) 
abline(v=coeftest(l_satc,cluster.vcov(l_satc, p2$day))[2,3], col="red")


plot(density(unlist(lapply(rep.t, function(x) x[2]))), main = "Central Satisfaction", 
     xlab=paste("P-value =", c_sat_pt, sep=" ")) 
abline(v=coeftest(c_satc,cluster.vcov(c_satc, p2$day))[2,3], col="red")


plot(density(unlist(lapply(rep.t, function(x) x[3]))), main = "Local Oversight", 
     xlab=paste("P-value =", l_watch_pt, sep=" ")) 
abline(v=coeftest(l_watchc,cluster.vcov(l_watchc, p2$day))[2,3], col="red")


plot(density(unlist(lapply(rep.t, function(x) x[4]))), main = "Central Oversight", 
     xlab=paste("P-value =", c_watch_pt, sep=" ")) 
abline(v=coeftest(c_watchc,cluster.vcov(c_watchc, p2$day))[2,3], col="red")


plot(density(unlist(lapply(rep.t, function(x) x[5]))), main = "Belief in Anti-West Conspiracy", 
     xlab=paste("P-value =", anti_west_pt, sep=" ")) 
abline(v=coeftest(anti_westc,cluster.vcov(anti_westc, p2$day))[2,3], col="red")


dev.off()


